/* Appendix Table 4 */

#delimit ;
clear all;

local outfile "GWgapr10_indivHLFS_het";
set more off;


di _n "$S_DATE $S_TIME";








********************************************************************************;
* generating quartiles of CCK worker FE estimated pooling genders;
********************************************************************************;

use GWgap_2wFE_extract_v4_wfe_t, clear;

xtile wfe_t_xtile = wfe_t, n(4);
label var wfe_t_xtile "Quartiles of CCK worker FE estd pooling genders";
save temp_wfequart, replace;





********************************************************************************;
* preparing data;
********************************************************************************;

use GWgap_pr_HLFS_v4, clear;



* merging in CCK fe quartiles estd pooling genders;

merge m:1 snz_uid using temp_wfequart, keep(master match) nogen;


* creating variables that mark the combinations of year with various other categorical variables;

egen agecat_yr = group(age_cat year);
label var agecat_yr "Groups defined by age category and financial year";

egen occ3d_yr = group(occ3d_HLFS year);
label var occ3d_yr "Groups defined by 3d occupation and financial year";

egen eth_yr = group(eth_gp year);
label var eth_yr "Groups defined by ethnicity combination (15 categories) and financial year";

egen hqual_yr = group(hqual year);
label var hqual_yr "Groups defined by highest qualification and financial year";

egen rc_yr = group(rc_HLFS year);
label var rc_yr "Groups defined by Regional Council and financial year";

egen hhcomp_yr = group(hhcomp_HLFS year);
label var hhcomp_yr "Groups defined by family composition and financial year";

egen numkid_yr = group(numkid_u18_cat year);
label var numkid_yr "Groups defined by number of kids parented and financial year";

egen ind4_yr = group(ind4 year);
label var ind4_yr "Groups defined by 4-digit ANZSIC industry and financial year";


* year*age and year*age2 variables;

forvalues year = 2008/2016 {;

	gen y`year'_age = (year==`year')*age;
	gen y`year'_age2 = (year==`year')*age2;
	gen y`year'_KL_rat = (year==`year')*KL_rat;
	gen y`year'_lnL_hc = (year==`year')*lnL_hc;
	
};

rename ind4 ind4u;




*** making heterogeneity variables;

* heterogeneity by age;

gen age_f = female*age;
label var age_f "Age * Female";
gen age2_f = age2*female;
label var age2_f "Age squared (/100) * Female";

gen age1_f = (age_cat==1)* female;
label var age1_f "Aged under 25 * Female";
gen age3_f = (age_cat==3)*female;
label var age3_f "Aged 40 to 54 * Female";
gen age4_f = (age_cat==4)*female;
label var age4_f "Aged 55+ * Female"; 


* heterogeneity by tenure;

gen ten1_f = wkd_hpp_1ya * female;
label var ten1_f "Worked for highest paying pent in previous year * Female";
gen ten2_f = wkd_hpp_2ya * female;
label var ten2_f "Worked for highest paying pent for last two years * Female";
 
* heterogeneity by ethnicity (most common 5 combinations and other only);

gen eth_ct = eth_eur + eth_mao + eth_pac + eth_asi + eth_mel + eth_oth;
gen eth_eur_f = (eth_eur==1 & eth_ct==1)*female;
label var eth_eur_f "European ethnicity only * Female";
gen eth_mao_f = (eth_mao==1 & eth_ct==1)*female;
label var eth_mao_f "Maori ethnicity only * Female";
gen eth_pac_f = (eth_pac==1 & eth_ct==1)*female;
label var eth_pac_f "Pacific ethnicity only * Female";
gen eth_asi_f = (eth_asi==1 & eth_ct==1)*female;
label var eth_asi_f "Asian ethnicity only * Female";
gen eth_eur_mao_f = (eth_eur==1 & eth_mao==1 & eth_ct==2)*female;
label var eth_eur_mao_f "European and Maori ethnicity only * Female";
gen eth_othpl_f = female - eth_eur_f - eth_mao_f - eth_pac_f - eth_asi_f - eth_eur_mao_f;
label var eth_othpl_f "Other ethnicity combination * Female";


* heterogeneity by highest qual;

forvalues i = 1/3 {;
	gen hqual_`i'_f = (hqual==`i')*female;
};
label var hqual_1_f "School qualifications * Female";
label var hqual_2_f "Post-school qualifications * Female";
label var hqual_3_f "Degree * Female";


* heterogeneity by household composition;

gen hhcomp_cou_f = (hhcomp_HLFS==2)* female;
label var hhcomp_cou_f "Couple, no children * Female";
gen hhcomp_sngch_f = (hhcomp_HLFS==3)* female;
label var hhcomp_sngch_f "Single, with children * Female";
gen hhcomp_couch_f = (hhcomp_HLFS==4)* female;
label var hhcomp_couch_f "Couple, with children * Female";
gen hhcomp_unid_f = (hhcomp_HLFS==9)* female;
label var hhcomp_unid_f "Unidentifiable household composition * Female";

* heterogeneity by number of children;

forvalues i = 1/3 {;
	gen numkid_u18_`i'_f = (numkid_u18_cat==`i')*female;
};
label var numkid_u18_1_f "Parent to 1 child under 18 * Female";
label var numkid_u18_2_f "Parent to 2 children under 18 * Female";
label var numkid_u18_3_f "Parent to 3+ children under 18 * Female";

* heterogeneity by firm size;

gen lnL_hc_f = lnL_hc*female;
label var lnL_hc_f "Pent average head count (ln) * Female";

* heterogeneity by wfe quartile;

fvset base 4 wfe_t_xtile;
forvalues i = 2/4 {;
	gen wfe_t_xtile_`i'_f = (wfe_t_xtile==`i')*female;
	label var wfe_t_xtile_`i'_f "Worker FE quartile `i' * Female";
};

label define wfe_t_xtile 1 "Quartile 1 of CCK worker FE distribution"
	2 "Quartile 2" 3 "Quartile 3" 4 "Quartile 4";
label val wfe_t_xtile wfe_t_xtile;


foreach var in max_fte_employee_av hrs_main_HLFS {;
	gen ln`var' = ln(`var');
	local lab: var label `var';
	label var ln`var' "`lab' (ln)";
};




********************************************************************************;
* creating macros;
********************************************************************************;


local dvar lnIDI_earnhp; 
local dvarl: var label `dvar';

local indicators `"
	"Year FE = *year*"
	"Ethnicity combination FE (15 categories) = *eth_gp*"
	"Occupation FE (3-digit ANZSCO 2006) = *occ3d_HLFS*"
	"Regional Council FE = *rc_HLFS*"
	"Industry FE (4-digit ANZSIC) = *ind4u*"
	"';
	



*** regression controls;

local c2 "female lnhrs_main_HLFS i.year";
local c3 "`c2' i.hqual i.rc_HLFS i.hhcomp_HLFS i.numkid_u18_cat i.eth_gp"; 
local c4 "`c3' i.occ3d_HLFS age age2"; 
local c6 "`c4' wkd_hpp_1ya wkd_hpp_2ya lnL_hc KL_rat val_ad_pw hp_ffe_m i.ind4u age age2";



local het1 "age1_f age3_f age4_f i.age_cat"; * age heterogeneity variables;
local het2 "ten1_f ten2_f wkd_hpp_1ya wkd_hpp_2ya"; * heterogeneity by tenure;
local het3 "eth_mao_f eth_pac_f eth_asi_f eth_eur_mao_f eth_othpl_f i.eth_gp"; * het by ethnic combination, european only omitted;
local het4 "hqual_1_f hqual_2_f hqual_3_f  i.hqual";
	* heterogeneity by highest qualification;
local het5 "hhcomp_cou_f hhcomp_sngch_f hhcomp_couch_f hhcomp_unid_f i.hhcomp_HLFS"; * het by hshold comp, omitted sing, no kids;
local het6 "numkid_u18_1_f numkid_u18_2_f numkid_u18_3_f i.numkid_u18_cat "; * het by number of children;
local het7 "lnL_hc_f lnL_hc  ";
local het8 "wfe_t_xtile_2_f wfe_t_xtile_3_f wfe_t_xtile_4_f i.wfe_t_xtile "; * het by CCK worker FE quartile







********************************************************************************;
* running regs;
********************************************************************************;



foreach sample in Q_rest {; * looping over sample to be included;


	if "`sample'"=="Q_rest" {;
		local samplel "Restricted consistent sample used for the rest of the analysis";
		local samplec "Q_rest==1";
	};
	
	capture erase "`outfile'_`sample'`suff'.txt";


	
	forvalues het = 1/8 {; * looping over heterogeneity dimensions;
		
		if `het'==1 local hetl "Age";
		if `het'==2 local hetl "Tenure";
		if `het'==3 local hetl "Ethnicity";
		if `het'==4 local hetl "Highest qualification";
		if `het'==5 local hetl "Household structure";
		if `het'==6 local hetl "Number of children";
		if `het'==7 local hetl "Firm size (head count)";
		if `het'==8 local hetl "CCK worker FE quartile";

		local headers "";
		eststo clear;


		foreach i in 2 4 6 {; * looping over sets of regression controls;
			reg `dvar' `het`het'' `c`i'' if `samplec', vce(cluster snz_uid);
			/* use next line to conf observation count;
			rndobs N, seed(0871525);
			*/
			eststo;
			local headers `" `headers' "(`i')" "';
		};


		estout * using "`outfile'_`sample'`suff'.txt", append
			style(tab) 
			c(b(star fmt(%9.3f)) se(par))
			legend label collabels(, none)
			dropped ("dropped")
			drop(_cons )
			mlabels(`headers')
			varl(`labels')
			indicate(`indicators')
			order(female)
			starlevels(* 0.05 ** 0.01)
			stats(r2 N, 
				fmt(%9.3f %9.0fc ) 
			labels("R-Squared" Observations ))
			prehead("`=char(13)'`=char(13)'
			`=char(13)'*`sample'*****************************************************")
			varwidth(67) modelwidth(11) delimiter("");
	};
	
};

